package main

import (
	"bufio"
	"io"
	"log"
	"math/rand"
	"os"
	"strconv"
	"sync"
)

var (
	cpus       = 50
	lineCount  = 20000000
	fileCh     = make(chan *os.File, cpus)
	wg = sync.WaitGroup{}
)

func main() {
	err := os.RemoveAll("./data/")
	if err != nil {
		log.Fatal(err)
	}
	err = os.MkdirAll("./data/", 0644)
	if err != nil {
		log.Fatal(err)
	}
	file, err := os.OpenFile("./data/data.txt", os.O_CREATE|os.O_RDWR, 0644)
	if err != nil {
		log.Fatal(err)
	}
	for i := 0; i < cpus; i++ {
		wg.Add(1)
		go genChildFile(i, lineCount/cpus)
	}
	go func() {
		wg.Wait()
		close(fileCh)
	}()
	mergeFile(file)
}

//genChildFile 生成临时文件,n 第n个临时文件，lines 该文件生成多少条数据
func genChildFile(n int, lines int) {
	defer wg.Done()
	tmpFile, err := os.OpenFile("./data/"+strconv.Itoa(n)+".txt", os.O_CREATE|os.O_RDWR, 0644)
	writer := bufio.NewWriterSize(tmpFile,1024*1024)
	if err != nil {
		log.Fatal(err)
	}
	for i := 0; i < lines; i++ {
		s := randomString()
		_, err = writer.WriteString(s + "\n")
		if err != nil {
			log.Fatal(err)
		}
		//生成重复数据
		if i%3 == 0 {
		_, err = writer.WriteString(s + "\n")
		if err != nil {
			log.Fatal(err)
		}
		}
	}
	err = writer.Flush()
	if err != nil {
		log.Fatalln(err)
	}
	fileCh <- tmpFile
	tmpFile.Close()
	//当前协程生成完临时文件过后写入信号
}

func mergeFile(file *os.File) {
	for tmpFile := range fileCh {
		log.Println("open " + tmpFile.Name())
		open, err := os.Open(tmpFile.Name())
		_, err = io.Copy(file, open)
		if err != nil {
			log.Fatal(err)
		}
		_ = open.Close()
		err = os.Remove(open.Name())
		if err != nil {
			log.Fatal(err)
		}
	}
	log.Println("merge done")
}

func randomInt(min, max int) int {
	return min + rand.Intn(max-min)
}

func randomString() string {
	b := make([]byte, 0)
	chars := rand.Intn(90)
	//字符数至少大于十
	if chars <= 10 {
		chars += 10
	}
	for i := 0; i < chars; i++ {
		if rand.Intn(10)%2 == 0 {
			b = append(b, byte(randomInt(65, 90)))
			continue
		}
		b = append(b, byte(randomInt(97, 122)))
	}
	return string(b)
}
